# -*- coding: utf-8 -*-
"""
@Time ： 2021/1/25 一缕青丝伴忧愁:10
@Auth ： 张张呀
@File ：小姐姐.py
@IDE ：PyCharm
@Motto：ABC(Always Be Coding)

"""
import os
import re
import time

import requests

headers = {
    'User-Agent': "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1 Safari/537.1",
    'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    'Accept-Encoding': 'gzip',
    "Referer": "https://www.baidu.com/"
}
httpnum = int(input("请输入爬取网页的起始ID:"))#13874 青春  13344
httpnum1 = int(input("请输入爬取网页的结束ID:"))
for i in range(httpnum, httpnum1 + 1):
    httpurl = "https://www.vmgirls.com/{0}.html".format(i)
    response = requests.get(httpurl, headers=headers)
    html = response.text
    if str("<style></style><meta name=keywords content=") not in html:
        print("{0}网页不存在".format(i))
        continue
    else:
        dir_name0 = re.findall('<h1 class="post-title h1">(.*?)</h1>', html)[-1]
        dir_name = str(i) + dir_name0
        if not os.path.exists(dir_name):
            os.mkdir(dir_name)
        u1 = re.findall('<img alt=".*?" loading=lazy src="(.*?)" alt=', html)
        u2 = re.findall('<a href="(.*?)" alt=".*?" title=".*?">', html)
        u3 = re.findall('style=".*?" data-pagespeed-lsc-url="https:(.*?)"/>', html)
        urls = u1
        if len(u1) == 0:
            urls = u2
            if len(u2) == 0:
                urls = u3
        for url in urls:
            time.sleep(1)
            name = url.split('/')[-1]
            response = requests.get("https:" + url, headers=headers)
            print(name + "正在下载")
            with open(dir_name + '/' + name, 'wb') as f:
                f.write(response.content)
        print("{0}下载完毕".format(i))
print("全部下载完毕")
input("按回车键退出")
